import pandas as pd

# 读取文件
df = pd.read_csv('final_jobinfo.csv')

# 去除工资的单位 K
salary = df[['最低工资','最高工资']].replace(regex={r'(\d+)K':r'\1'}).astype("float")

# 计算平均工资  axis=1 表示取列的值进行计算
salary_avg = salary.apply(lambda item: (item['最低工资'] + item['最高工资'])/2, axis=1)

# 将数据分成三个挡位  0- 10K  10 - 25K  25K 以上
salary_dic = {'0-10K':0,'10-25K':0,'25K以上':0}
for i in salary_avg:
    if( 0<= i <= 10):
        salary_dic['0-10K']  += 1 
    elif( 10< i <= 25):
        salary_dic['10-25K'] += 1 
    else:
        salary_dic['25K以上'] += 1

# 岗位总数量
count = 0
for value in salary_dic.values():
    count += value

# 转换为百分比
for key in salary_dic.keys():
    value = (salary_dic[key] / count)*100
    salary_dic[key] = float("%.2f" %(value))

print(salary_dic)
# # 生成饼图
from pyecharts import options as opts
from pyecharts.charts import Pie

c = (
    Pie()
    .add("", [list(z) for z in zip(salary_dic.keys(),salary_dic.values())])
    .set_global_opts(title_opts=opts.TitleOpts(title="大数据岗位薪资水平分布"))
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}%"))
    .render("大数据岗位薪资水平分布.html")
)